{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import yaml\n",
    "import os\n",
    "import logging\n",
    "import plotly\n",
    "import plotly.express as px\n",
    "import plotly.graph_objects as go\n",
    "import pandas as pd\n",
    "log = logging.getLogger(__name__)\n",
    "log.setLevel(logging.INFO)\n",
    "if not log.handlers:\n",
    "    ch = logging.StreamHandler()\n",
    "    ch.setLevel(logging.INFO)\n",
    "    ch.setFormatter(logging.Formatter('%(levelname)s - %(message)s'))\n",
    "    log.addHandler(ch)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Parse experiment yaml file\n",
    "experiments_path=\"../experiments/regression_test.yaml\"\n",
    "\n",
    "# Get experiment information from yaml file.\n",
    "experiment_params = yaml.load(open(experiments_path))\n",
    "\n",
    "regression_tests_dir = os.path.expandvars(experiment_params['regression_tests_dir'])\n",
    "\n",
    "datasets_to_run = experiment_params['datasets_to_run']\n",
    "regression_params = experiment_params['regression_parameters']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Retrieve stats, if they are not there, try to collect them:\n",
    "def collect_stats(full_stats_path, regression_params, regression_tests_dir, datasets_to_run):\n",
    "    # TODO(Toni): recollection of results should be automatic by looking for results.yaml files in the\n",
    "    # regression_tests_dir file system.\n",
    "    # Collect all yaml results for a given parameter name:\n",
    "    stats = dict()\n",
    "    for regression_param in regression_params:\n",
    "        # Redirect to param_name_value dir param_name = regression_param['name']\n",
    "        param_name = regression_param['name']\n",
    "        stats[param_name] = dict()\n",
    "        for param_value in regression_param['values']:\n",
    "            results_dir = os.path.join(regression_tests_dir, param_name, str(param_value))\n",
    "            # Redirect to modified params_dir\n",
    "            params_dir = os.path.join(results_dir, 'params')\n",
    "            stats[param_name][param_value] = dict()\n",
    "            for dataset in datasets_to_run:\n",
    "                dataset_name = dataset['name']\n",
    "                pipelines_to_run = dataset['pipelines']\n",
    "                stats[param_name][param_value][dataset_name] = dict()\n",
    "                for pipeline in pipelines_to_run:\n",
    "                    results_file = os.path.join(results_dir, dataset_name, pipeline, \"results.yaml\")\n",
    "                    if os.path.isfile(results_file):\n",
    "                        stats[param_name][param_value][dataset_name][pipeline] = yaml.load(open(results_file,'r'))\n",
    "                    else:\n",
    "                        log.warning(\"Could not find results file: {}. Adding cross to boxplot...\".format(results_file))\n",
    "                        stats[param_name][param_value][dataset_name][pipeline] = False\n",
    "    \n",
    "    # Save all stats in regression tests root directory for future usage.\n",
    "    with open(full_stats_path, 'w') as outfile:\n",
    "        outfile.write(yaml.dump(stats))\n",
    "    return stats\n",
    "\n",
    "full_stats_path = os.path.join(regression_tests_dir, \"all_stats.yaml\")\n",
    "stats = dict()\n",
    "if os.path.isfile(full_stats_path):\n",
    "    log.info(\"Found existent stats. Opening full stats from:\" + full_stats_path)\n",
    "    stats = yaml.load(open(full_stats_path))\n",
    "else:\n",
    "    log.info(\"Collecting full stats.\")\n",
    "    stats = collect_stats(full_stats_path, regression_params, regression_tests_dir, datasets_to_run)\n",
    "    \n",
    "# Push to the cloud?!"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Store stats in a tidy Pandas DataFrame # TODO(Toni): this should be done in the evaluation_lib.py script...\n",
    "def listify_regression_stats(stats):\n",
    "    \"\"\" Makes a list of lists out of the stats (for easy conversion into pandas dataframe) \"\"\"\n",
    "    stats_list = []\n",
    "    for param_name in stats:\n",
    "        for param_value in stats[param_name]:\n",
    "            for dataset_name in stats[param_name][param_value]:\n",
    "                for pipeline in stats[param_name][param_value][dataset_name]:\n",
    "                    result = stats[param_name][param_value][dataset_name][pipeline]\n",
    "                    if result != False:\n",
    "                        result = result['absolute_errors'].np_arrays['error_array']\n",
    "                        stats_list.append([param_name, param_value, dataset_name, pipeline, result])\n",
    "    return stats_list\n",
    "\n",
    "# Create or load Pandas DataFrame\n",
    "df = pd.DataFrame()\n",
    "all_stats_pickle_dir = os.path.join(regression_tests_dir, 'all_stats.pkl')\n",
    "if os.path.isfile(all_stats_pickle_dir):\n",
    "    log.info(\"Found existent pickle file. Opening pickled stats from:\" + all_stats_pickle_dir)\n",
    "    df= pd.read_pickle(all_stats_pickle_dir)\n",
    "else:\n",
    "    log.info(\"Creating dataframe stats.\")\n",
    "    df = pd.DataFrame.from_records(listify_regression_stats(stats))\n",
    "    df.columns = ['Param Name', 'Param Value', 'Dataset Name', 'Pipe Type', 'ATE errors']\n",
    "    df.set_index(['Param Name', 'Dataset Name'], inplace = True)\n",
    "    \n",
    "    # Save dataframe as pickle for future use\n",
    "    #df.to_pickle(all_stats_pickle_dir)\n",
    "\n",
    "# Print df\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "def regression_boxplot(param_name, dataset_name, tidy):\n",
    "    tidy.set_index(['Param Value', 'Pipe Type'], inplace = True)\n",
    "    tidy_2 = tidy['ATE errors'].apply(lambda x: pd.Series(x)).stack().reset_index(level=2, drop=True).to_frame('ATE errors')\n",
    "    tidy_2.reset_index(level=['Pipe Type', 'Param Value'], drop=False, inplace=True)\n",
    "    fig = px.box(tidy_2, x='Param Value', y=\"ATE errors\", points=\"all\", color=\"Pipe Type\")\n",
    "\n",
    "    fig.update_layout(\n",
    "    title=go.layout.Title(\n",
    "        text=\"Dataset: \" + dataset_name\n",
    "    ),\n",
    "    xaxis=go.layout.XAxis(\n",
    "        title=go.layout.xaxis.Title(\n",
    "            text=param_name\n",
    "        )\n",
    "    ),\n",
    "    yaxis=go.layout.YAxis(\n",
    "        title=go.layout.yaxis.Title(\n",
    "            text=\"ATE [m]\"\n",
    "            ),\n",
    "        rangemode='tozero'\n",
    "        ),\n",
    "    template='plotly_white'\n",
    "    )\n",
    "    return fig"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Generate figures\n",
    "figures = [regression_boxplot(x, y, df.loc[x].loc[[y]]) for x in df.index.levels[0] for y in df.index.levels[1]]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Show figures\n",
    "for figure in figures:\n",
    "    figure.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import plotly.io as pio\n",
    "pio.orca.status\n",
    "plotly.io.orca.config.executable = 'venv/bin/orca-server'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "# Save figures\n",
    "if not os.path.exists(\"figures\"):\n",
    "    os.mkdir(\"figures\")\n",
    "for fig in figures:\n",
    "    plotly.offline.plot(fig, filename='figures/regression_test_' + fig.layout.title.text + '_' + fig.layout.xaxis.title.text + '.html')\n",
    "\n",
    "#for figure in figures:\n",
    "#    figure.write_image(\"figures/\"+ figure.layout.title.text + \".svg\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import chart_studio\n",
    "import chart_studio.plotly as py\n",
    "import chart_studio.tools as tls\n",
    "import plotly.graph_objects as go\n",
    "from chart_studio.grid_objs import Column, Grid\n",
    "\n",
    "from datetime import datetime as dt\n",
    "import numpy as np\n",
    "from IPython.display import IFrame\n",
    "\n",
    "upload_plots_online = True\n",
    "if upload_plots_online:\n",
    "    for fig in figures:\n",
    "        py.iplot(fig, filename='regression_test_' + fig.layout.title.text + '_' + fig.layout.xaxis.title.text + '.html', world_readable=True, auto_open=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def url_to_iframe(url, text=True):\n",
    "    html = ''\n",
    "    # style\n",
    "    html += '''<head>\n",
    "    <style>\n",
    "    div.textbox {\n",
    "        margin: 30px;\n",
    "        font-weight: bold;\n",
    "    }\n",
    "    </style>\n",
    "    </head>'\n",
    "    '''\n",
    "    # iframe\n",
    "    html += '<iframe src=' + url + '.embed#{} width=750 height=400 frameBorder=\"0\"></iframe>'\n",
    "    if text:\n",
    "        html += '''<body>\n",
    "        <div class=\"textbox\">\n",
    "            <p>Click on the presentation above and use left/right arrow keys to flip through the slides.</p>\n",
    "        </div>\n",
    "        </body>\n",
    "        '''\n",
    "    return html"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 2",
   "language": "python",
   "name": "python2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.15rc1"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
